Read in the data

library(scran)
library(scater)
library(DropletUtils)
library(openxlsx)
library(Rtsne)
library(umap)
library(RColorBrewer)
source("~/Dropbox/Postdoc/git/BE2020/Analysis/Functions/auxiliary.R")

Batch correction across tissues

sce.list <- readRDS("~/Dropbox/Postdoc/2019-12-29_BE2020/All_sce_filtered_high_quality.rds")

sce.list <- lapply(sce.list, function(n){
  rowData(n) <- rowData(n)[,1:2]
  n
})


# Order sce objects for batch correction
sce.list <- sce.list[order(unlist(lapply(sce.list, ncol)), decreasing = TRUE)]

n <- names(sce.list)
sce.list <- sce.list[c(which(grepl("NSCJ", n)), which(grepl("BSCJ", n)), 
                       which(grepl("NE", n)), which(grepl("NG", n)),
                       which(grepl("BE", n)), which(grepl("ND", n)),
                       which(grepl("SMG", n)))]


# Combine sce objects
sce <- do.call("cbind", sce.list)

# Batch correction
corrected <- batch.correction(sce.list)

# Save batch corrected counts in metdata
metadata(sce)$corrected <- corrected

# Compute tsne on corrected counts
set.seed(1234)
tsne <- Rtsne(t(corrected), pca = FALSE)
umap <- umap(t(corrected))

# Store tsne in slot
reducedDims(sce)$TSNE <- tsne$Y[,1:2]
reducedDims(sce)$umap <- umap$layout[,1:2]

# Clustering on corrected data
g <- buildSNNGraph(corrected, k = 10)
clusters <- igraph::cluster_louvain(g)$membership

# Save clustering in new slot
colData(sce)$Global_cluster <- clusters

# Visualize clustering
ggplot(data.frame(tSNE1 = tsne$Y[,1],
                  tSNE2 = tsne$Y[,2],
                  clusters = as.factor(clusters))) + 
  geom_point(aes(tSNE1, tSNE2, colour = clusters), size = 0.5)

# Tissue
ggplot(data.frame(tSNE1 = tsne$Y[,1],
                  tSNE2 = tsne$Y[,2],
                  tissue = as.factor(colData(sce)$Tissue))) + 
  geom_point(aes(tSNE1, tSNE2, colour = tissue), size = 0.5) + 
  scale_color_brewer(palette = "Set1")

ggplot(data.frame(UMAP1 = umap$layout[,1],
                  UMAP2 = umap$layout[,2],
                  tissue = as.factor(colData(sce)$Tissue))) + 
  geom_point(aes(UMAP1, UMAP2, colour = tissue), size = 0.5) + 
  scale_color_brewer(palette = "Set1")

# Patient
ggplot(data.frame(tSNE1 = tsne$Y[,1],
                  tSNE2 = tsne$Y[,2],
                  patient = as.factor(colData(sce)$Patient))) + 
  geom_point(aes(tSNE1, tSNE2, colour = patient), size = 0.5) + 
  scale_color_manual(values = c(brewer.pal(8, "Set1"), brewer.pal(8, "Set3")))

ggplot(data.frame(UMAP1 = umap$layout[,1],
                  UMAP2 = umap$layout[,2],
                  patient = as.factor(colData(sce)$Patient))) + 
  geom_point(aes(UMAP1, UMAP2, colour = patient), size = 0.5) + 
  scale_color_manual(values = c(brewer.pal(8, "Set1"), brewer.pal(8, "Set3")))

# Patient and tissue
qual_col_pals = brewer.pal.info[brewer.pal.info$category == 'qual',]
col_vector = unlist(mapply(brewer.pal, qual_col_pals$maxcolors, rownames(qual_col_pals)))

ggplot(data.frame(tSNE1 = tsne$Y[,1],
                  tSNE2 = tsne$Y[,2],
                  all = as.factor(paste(colData(sce)$Tissue, colData(sce)$Patient)))) + 
  geom_point(aes(tSNE1, tSNE2, colour = all), size = 0.5) + 
  scale_color_manual(values = col_vector)

ggplot(data.frame(UMAP1 = umap$layout[,1],
                  UMAP2 = umap$layout[,2],
                  all = as.factor(paste(colData(sce)$Tissue, colData(sce)$Patient)))) + 
  geom_point(aes(UMAP1, UMAP2, colour = all), size = 0.5) + 
  scale_color_manual(values = col_vector)

# Plot all patients individually 
# Generate patient colour vector
patient_vector <- c(brewer.pal(8, "Set1"), brewer.pal(8, "Set3"))[1:length(unique(sce$Patient))]
names(patient_vector) <- unique(sce$Patient)

for(i in c("NE", "NSCJ", "BSCJ", "BE", "NG", "ND", "SMG")){
  cur_tsne <- tsne$Y[sce$Tissue == i,]
  cur_patient <- sce$Patient[sce$Tissue == i]
  
  print(ggplot(data.frame(tSNE1 = cur_tsne[,1],
                          tSNE2 = cur_tsne[,2],
                          patient = cur_patient)) + 
          geom_point(aes(tSNE1, tSNE2, colour = patient), size = 0.5) + 
          scale_color_manual(values = patient_vector) + ggtitle(i))
}

table(colData(sce)$Patient, colData(sce)$Tissue)
##            
##               BE BSCJ   ND   NE   NG NSCJ  SMG
##   Patient01    0    0    0 2961  189  153    0
##   Patient02    0    0    0 5611 1280 2014    0
##   Patient03  737 1091    0  749  504    0    0
##   Patient04    0    0    0    0    0 2650    0
##   Patient05    0    0    0 1928    0 1344    0
##   Patient06    0    0    0  733   94 1250    0
##   Patient07 1448  764  570  998  599    0    0
##   Patient08    0    0  806  921  135  798    0
##   Patient09  361  529  545  659  217    0    0
##   Patient10    0    0    0 2103  139  867  821
##   Patient11    0    0    0    0    0    0 1911
##   Patient12    0    0 1653    0    0    0    0
##   Patient13    0    0    0    0  898 2626  383
##   Patient14  494    0    0    0    0    0    0
colSums(table(colData(sce)$Patient, colData(sce)$Tissue))
##    BE  BSCJ    ND    NE    NG  NSCJ   SMG 
##  3040  2384  3574 16663  4055 11702  3115
# Perform differential expression
markers <- findMarkers(sce, groups = colData(sce)$Global_cluster, 
                       block = paste(colData(sce)$Patient, colData(sce)$Tissue, sep = "_"))
## Warning in .pairwise_blocked_template(x, clust.vals, nblocks, direction =
## direction, : no within-block comparison between 19 and 1
## Warning in .pairwise_blocked_template(x, clust.vals, nblocks, direction =
## direction, : no within-block comparison between 19 and 2
## Warning in .pairwise_blocked_template(x, clust.vals, nblocks, direction =
## direction, : no within-block comparison between 19 and 3
## Warning in .pairwise_blocked_template(x, clust.vals, nblocks, direction =
## direction, : no within-block comparison between 19 and 5
## Warning in .pairwise_blocked_template(x, clust.vals, nblocks, direction =
## direction, : no within-block comparison between 19 and 6
## Warning in .pairwise_blocked_template(x, clust.vals, nblocks, direction =
## direction, : no within-block comparison between 19 and 7
## Warning in .pairwise_blocked_template(x, clust.vals, nblocks, direction =
## direction, : no within-block comparison between 19 and 8
## Warning in .pairwise_blocked_template(x, clust.vals, nblocks, direction =
## direction, : no within-block comparison between 19 and 9
## Warning in .pairwise_blocked_template(x, clust.vals, nblocks, direction =
## direction, : no within-block comparison between 19 and 10
## Warning in .pairwise_blocked_template(x, clust.vals, nblocks, direction =
## direction, : no within-block comparison between 19 and 11
## Warning in .pairwise_blocked_template(x, clust.vals, nblocks, direction =
## direction, : no within-block comparison between 19 and 15
## Warning in .pairwise_blocked_template(x, clust.vals, nblocks, direction =
## direction, : no within-block comparison between 19 and 16
## Warning in .pairwise_blocked_template(x, clust.vals, nblocks, direction =
## direction, : no within-block comparison between 25 and 19
## Warning in .pairwise_blocked_template(x, clust.vals, nblocks, direction =
## direction, : no within-block comparison between 26 and 15
## Warning in .pairwise_blocked_template(x, clust.vals, nblocks, direction =
## direction, : no within-block comparison between 26 and 19
markers.spec <- lapply(markers, function(n){
  if(!is.na(n$Top[1]) & !is.nan(sum(as.matrix(n[1,4:ncol(n)])))){
    test_n <- !is.na(n[1,4:ncol(n)])[1,]
    cur_n <- n[n$FDR < 0.1 & apply(n[,4:ncol(n)], 1, function(x){sum(x[test_n] > 0)}) == sum(test_n),]
    if(nrow(cur_n) > 0){
      cur_n$GeneName <- rowData(sce)$Symbol[match(rownames(cur_n), rowData(sce)$ID)]
    }
  }
  else{
    cur_n <- NULL
  }
  cur_n
})

write.xlsx(markers.spec, paste("~/Dropbox/Postdoc/2019-12-29_BE2020/Results/Marker_genes/Global_filtered/Marker_genes.xlsx", sep = ""))

saveRDS(sce, "~/Dropbox/Postdoc/2019-12-29_BE2020/All_corrected_sce_filtered.rds")

Add tissue clutsering to batch corrected data

all_sce <- readRDS("~/Dropbox/Postdoc/2019-12-29_BE2020/All_corrected_sce_filtered.rds")
tissue_sce <- readRDS("~/Dropbox/Postdoc/2019-12-29_BE2020/Tissue_sce_filtered.rds")

tissue.clusters <- vector(length = ncol(all_sce))
names(tissue.clusters) <- paste(colData(all_sce)$Barcode, 
                                colData(all_sce)$Patient,
                                colData(all_sce)$Tissue, sep = "_")

for(i in 1:length(tissue_sce)){
  tissue.clusters[paste(colData(tissue_sce[[i]])$Barcode, 
                        colData(tissue_sce[[i]])$Patient,
                        colData(tissue_sce[[i]])$Tissue, sep = "_")] <- colData(tissue_sce[[i]])$Tissue_cluster
}

colData(all_sce)$Tissue_cluster <- tissue.clusters

saveRDS(all_sce, "~/Dropbox/Postdoc/2019-12-29_BE2020/All_corrected_sce_filtered.rds")

End Matter

To finish get session info:

sessionInfo()
## R version 3.6.2 (2019-12-12)
## Platform: x86_64-redhat-linux-gnu (64-bit)
## Running under: Fedora 31 (Workstation Edition)
## 
## Matrix products: default
## BLAS/LAPACK: /usr/lib64/R/lib/libRblas.so
## 
## locale:
##  [1] LC_CTYPE=en_GB.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_GB.UTF-8        LC_COLLATE=en_GB.UTF-8    
##  [5] LC_MONETARY=en_GB.UTF-8    LC_MESSAGES=en_GB.UTF-8   
##  [7] LC_PAPER=en_GB.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_GB.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] destiny_3.0.1               edgeR_3.28.0               
##  [3] limma_3.42.2                dbscan_1.1-5               
##  [5] princurve_2.1.4             dynamicTreeCut_1.63-1      
##  [7] RColorBrewer_1.1-2          umap_0.2.4.1               
##  [9] Rtsne_0.15                  openxlsx_4.1.4             
## [11] DropletUtils_1.6.1          scater_1.14.6              
## [13] ggplot2_3.2.1               scran_1.14.6               
## [15] SingleCellExperiment_1.8.0  SummarizedExperiment_1.16.1
## [17] DelayedArray_0.12.2         BiocParallel_1.20.1        
## [19] matrixStats_0.55.0          Biobase_2.46.0             
## [21] GenomicRanges_1.38.0        GenomeInfoDb_1.22.0        
## [23] IRanges_2.20.2              S4Vectors_0.24.3           
## [25] BiocGenerics_0.32.0         rmarkdown_2.1              
## 
## loaded via a namespace (and not attached):
##   [1] readxl_1.3.1             RcppEigen_0.3.3.7.0      igraph_1.2.4.2          
##   [4] lazyeval_0.2.2           sp_1.3-2                 RcppHNSW_0.2.0          
##   [7] digest_0.6.24            htmltools_0.4.0          viridis_0.5.1           
##  [10] magrittr_1.5             R.utils_2.9.2            xts_0.12-0              
##  [13] askpass_1.1              colorspace_1.4-1         rappdirs_0.3.1          
##  [16] haven_2.2.0              xfun_0.12                dplyr_0.8.4             
##  [19] crayon_1.3.4             RCurl_1.98-1.1           jsonlite_1.6.1          
##  [22] hexbin_1.28.1            zoo_1.8-7                glue_1.3.1              
##  [25] gtable_0.3.0             zlibbioc_1.32.0          XVector_0.26.0          
##  [28] car_3.0-6                BiocSingular_1.2.1       Rhdf5lib_1.8.0          
##  [31] DEoptimR_1.0-8           HDF5Array_1.14.2         abind_1.4-5             
##  [34] VIM_5.1.0                scales_1.1.0             ggplot.multistats_1.0.0 
##  [37] ggthemes_4.2.0           Rcpp_1.0.3               viridisLite_0.3.0       
##  [40] laeken_0.5.1             reticulate_1.14          dqrng_0.2.1             
##  [43] foreign_0.8-72           rsvd_1.0.2               proxy_0.4-23            
##  [46] vcd_1.4-5                farver_2.0.3             pkgconfig_2.0.3         
##  [49] R.methodsS3_1.8.0        nnet_7.3-12              locfit_1.5-9.1          
##  [52] labeling_0.3             tidyselect_1.0.0         rlang_0.4.4             
##  [55] munsell_0.5.0            cellranger_1.1.0         tools_3.6.2             
##  [58] ranger_0.12.1            batchelor_1.2.4          evaluate_0.14           
##  [61] stringr_1.4.0            yaml_2.2.1               knitr_1.28              
##  [64] zip_2.0.4                robustbase_0.93-5        purrr_0.3.3             
##  [67] formatR_1.7              R.oo_1.23.0              compiler_3.6.2          
##  [70] beeswarm_0.2.3           curl_4.3                 e1071_1.7-3             
##  [73] smoother_1.1             tibble_2.1.3             statmod_1.4.33          
##  [76] stringi_1.4.5            RSpectra_0.16-0          forcats_0.4.0           
##  [79] lattice_0.20-38          Matrix_1.2-18            vctrs_0.2.2             
##  [82] pillar_1.4.3             lifecycle_0.1.0          lmtest_0.9-37           
##  [85] BiocNeighbors_1.4.1      data.table_1.12.8        bitops_1.0-6            
##  [88] irlba_2.3.3              R6_2.4.1                 pcaMethods_1.78.0       
##  [91] gridExtra_2.3            rio_0.5.16               vipor_0.4.5             
##  [94] codetools_0.2-16         boot_1.3-23              MASS_7.3-51.4           
##  [97] assertthat_0.2.1         rhdf5_2.30.1             openssl_1.4.1           
## [100] withr_2.1.2              GenomeInfoDbData_1.2.2   hms_0.5.3               
## [103] grid_3.6.2               tidyr_1.0.2              class_7.3-15            
## [106] DelayedMatrixStats_1.8.0 carData_3.0-3            TTR_0.23-6              
## [109] scatterplot3d_0.3-41     ggbeeswarm_0.6.0